# -*- coding: utf-8 -*-
import copy
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.model.items import Box
from zc_core.util.http_util import retry_request
from zc_core.spiders.base import BaseSpider
from huiemall.rules import *


class CatalogSpider(BaseSpider):
    name = 'catalog'
    custom_settings = {
        'CONCURRENT_REQUESTS': 6,
        'DOWNLOAD_DELAY': 0.1,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 6,
        'CONCURRENT_REQUESTS_PER_IP': 6,
    }
    # 常用链接

    search_index_url = 'http://www.huiemall.com/HeFei/goodssearch.html?page=1&sort=salesvolume_desc'
    search_api_url = 'http://www.huiemall.com/EpointMallHeFeiService/rest/product/search'
    shop_list_url = 'http://www.huiemall.com/EpointMallHeFeiService/rest/spgys/getsearchgys'
    sku_list_url = 'http://www.huiemall.com/EpointMallHeFeiService/rest/spgys/getproducts'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(CatalogSpider, self).__init__(batchNo=batchNo, *args, **kwargs)

    def start_requests(self):
        # 品类
        yield Request(
            url=self.search_index_url,
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
            },
            callback=self.parse_index_catalog,
            errback=self.error_back,
            # priority=100,
            dont_filter=True,
            headers={
                'Accept-Encoding': 'gzip, deflate, sdch',
                'Accept-Language': 'en-US,en;q=0.8',
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Referer': 'http://www.wikipedia.org/',
                'Connection': 'keep-alive',
            }
        )
        # 供应商
        # yield Request(
        #     method='POST',
        #     url=self.shop_list_url,
        #     headers={
        #         'Accept': 'application/json, text/javascript, */*; q=0.01',
        #         'Content-Type': 'application/json',
        #     },
        #     body=json.dumps({
        #         "page": "1",
        #         "size": "16",
        #         "url": "http://www.huiemall.com/HeFei/gyssearch.html?page=1",
        #     }),
        #     meta={
        #         'reqType': 'supplier',
        #         'batchNo': self.batch_no,
        #     },
        #     callback=self.parse_total_supplier,
        #     errback=self.error_back,
        #     priority=10,
        #     dont_filter=True
        # )

    def parse_index_catalog(self, response):
        # print(response.text)
        cats = parse_index_catalog(response)
        self.logger.info('菜单品类: count[%s]' % len(cats))

        # 补充固定一级分类
        add_fixed_catalog(cats)

        # 搜索品类
        print({"productname": "", "brandname": "", "cid": "", "price": "", "avgprice": "", "sort": "salesvolume_desc",
               "jnhb": "", "jinkou": "", "page": "1", "size": 12, "isdingdian": "",
               "platformurl": "http://www.huiemall.com/HeFei/goodssearch.html?productname=",
               "referer": "http://www.huiemall.com/HeFei/goodssearch.html?productname="})
        yield Request(
            method='POST',
            url=self.search_api_url,
            headers={
                'Accept': 'application/json, text/javascript, */*; q=0.01',
                'Content-Type': 'application/json;charset=UTF-8',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
                'X-Requested-With': 'XMLHttpRequest',
            },
            body=json.dumps(
                {"productname": "", "brandname": "", "cid": "", "price": "", "avgprice": "", "sort": "salesvolume_desc",
                 "jnhb": "", "jinkou": "", "page": "1", "size": 12, "isdingdian": "",
                 "platformurl": "http://www.huiemall.com/HeFei/goodssearch.html?productname=",
                 "referer": "http://www.huiemall.com/HeFei/goodssearch.html?productname="}),
            meta={
                'reqType': 'sku',
                'batchNo': self.batch_no,
                'cats': copy.copy(cats),
            },
            callback=self.parse_search_catalog,
            errback=self.error_back,
            priority=200,
        )

    def parse_search_catalog(self, response):
        # 品类列表
        cats = parse_search_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

    def parse_total_supplier(self, response):
        # 供应商分页总数
        total = parse_total_supplier(response)
        self.logger.info('供应商总页数: total[%s]' % total)

        if total:
            # 供应商
            for page in range(1, total + 1):
                yield Request(
                    method='POST',
                    url=self.shop_list_url,
                    headers={
                        'Accept': 'application/json, text/javascript, */*; q=0.01',
                        'Content-Type': 'application/json',
                    },
                    body=json.dumps({
                        "page": str(page),
                        "size": "16",
                        "url": "http://www.huiemall.com/HeFei/gyssearch.html?page={}".format(str(page)),
                        "referer": "http://www.huiemall.com/HeFei/gyssearch.html?page={}".format(str(page)),
                    }),
                    meta={
                        'reqType': 'supplier',
                        'page': str(page),
                        'batchNo': self.batch_no,
                    },
                    callback=self.parse_supplier,
                    errback=self.error_back,
                    priority=20,
                    dont_filter=True
                )

    def parse_supplier(self, response):
        meta = response.meta
        page = meta.get('page')
        # 处理供应商列表
        suppliers = parse_supplier(response)
        if suppliers:
            self.logger.info('供应商: page[%s], count[%s]' % (page, len(suppliers)))
            yield Box('supplier', self.batch_no, suppliers)
